Clothing ID 862 Product Review Summary

Column

Wordcloud of the Review

Age Distribution of Customers

Column {data-height=1000}

Bigram

Trigram

Clothing ID 1078 Product Review Summary

Column

Wordcloud of the Review

Age Distribution of Customers

Column {data-height=1000}

Bigram

Trigram

Clothing ID 1094 Product Review Summary

Column

Wordcloud of the Review

Age Distribution of Customers

Column {data-height=1000}

Bigram

Trigram

Clothing ID 1081 Product Review Summary

Column

Wordcloud of the Review

Age Distribution of Customers

Column {data-height=1000}

Bigram

Trigram

Clothing ID 872 Product Review Summary

Column

Wordcloud of the Review

Age Distribution of Customers

Column {data-height=1000}

Bigram

Trigram

---
title: "NLP Analysis on the Customer Reviews of Top 5 Products"
output: 
  flexdashboard::flex_dashboard:
    orientation: columns
    source_code: embed
    theme: sandstone
    
---

```{r setup, include=FALSE}
library(flexdashboard)
library(ggplot2)
library(plotly)
library(rJava)
library(dplyr)
library(tidytext)
library(tm)
library(ggplot2)
library(reshape2)
library(wordcloud)
library(RWeka)
library(qdap)
library(readr)
library(stringr)
library(pdfsearch)
library(h2o)
library(Rtsne)
library(wordcloud2)
library(ggthemes)
library(highcharter)
library(topicmodels)
library(plotly)
 library(ivmte)
```


```{r}

review_reader <- function(product_id){
  doc <- read_csv("C:/Users/somang.han/Downloads/womens-ecommerce-clothing-reviews/Womens Clothing E-Commerce Reviews.csv",col_names = T)
  colnames(doc) <- c("num_comments","clothing_id","age","title","review_text","rating","recommended_ind","positive_feedback_count","division_name","department_name","class_name")
  doc = doc[complete.cases(doc$review_text),] #getting rid of NAs 
  small_doc=doc[doc$clothing_id==product_id,]
  corpus =  VCorpus(VectorSource(small_doc$review_text))
  sepNumbers <- function(x) {gsub("([0-9])([a-zA-Z])", "\\1 \\2", x)} #func to seperate num from words
  corpus.ng =  corpus %>% tm_map(removeWords,c(stopwords(),"s","ve","I","I'm","'m","'","you","your","were","etc","such","either","yes","dont","however","also","e","d","she","didn","he","could","couldn","which","will","had","did","when","doesn't","does","because","your","you","u","me","it's","its","ll","still","nor","am","ax","i","you","edu","s","t","m","subject","can","lines","re","what","there","all","we","one","the","this","still","org","of","or","in","for","by","on","'ll","but","is","in","a","an","with","as","was","if","they","are","this","and","it","from","at","my","be","not","that","to","com","org","don't","so","has","hasn't","haven't","have not")) %>%  # remove stopwords
                tm_map(removePunctuation) %>% 
                tm_map(content_transformer(sepNumbers)) %>%
                tm_map(removeNumbers) # Remove numbers
  corpus.ng
  
}


original_review <- function(product_id){
  doc <- read_csv("C:/Users/somang.han/Downloads/womens-ecommerce-clothing-reviews/Womens Clothing E-Commerce Reviews.csv",col_names = T)
  colnames(doc) <- c("num_comments","clothing_id","age","title","review_text","rating","recommended_ind","positive_feedback_count","division_name","department_name","class_name")
  doc = doc[complete.cases(doc$review_text),] #getting rid of NAs 
  small_doc=doc[doc$clothing_id==product_id,]
  small_doc
  
}


bi_gram_out <- function(clean_document){
  BigramTokenizer <- function(x) NGramTokenizer(x, Weka_control(min = 2, max = 2))
  tdm.bigram = TermDocumentMatrix(clean_document, control = list(tokenize = BigramTokenizer))
  freq = sort(rowSums(as.matrix(tdm.bigram)),decreasing = TRUE)
  freq.df = data.frame(word=names(freq), freq=freq)
  head(freq.df,15) %>% hchart(type="bar", hcaes(x = word, y = freq)) %>% hc_title(text = "Top 15 Frequent Pair Words")  %>% hc_colors("orange") %>% hc_add_theme(hc_theme_flat())

}


tri_gram_out <- function(clean_document){
  TrigramTokenizer <- function(x) NGramTokenizer(x, Weka_control(min = 3, max = 3))
  tdm.trigram = TermDocumentMatrix(clean_document, control = list(tokenize = TrigramTokenizer))
  freq = sort(rowSums(as.matrix(tdm.trigram)),decreasing = TRUE)
  freq.df = data.frame(word=names(freq), freq=freq)
 head(freq.df,15) %>% hchart(type="bar", hcaes(x = word, y = freq)) %>% hc_title(text = "Top 15 Frequent Three Consecutive Words") %>% hc_colors("green") %>% hc_add_theme(hc_theme_flat()) }


word_cloud_fun <-function(data){
mycorpus <- tm_map(data, removePunctuation)
dtm <- TermDocumentMatrix(mycorpus)
m <- as.matrix(dtm)
v <- sort(rowSums(m),decreasing=TRUE)
d <- data.frame(word = names(v),freq=v)

#wordcloud2(d, size=1.5, color='random-light', backgroundColor="black")

wordcloud(words = d$word, freq = d$freq, min.freq = 20,
          max.words=90, random.order=FALSE, rot.per=0.35, 
          colors=brewer.pal(8, "Dark2"))  
  
}


age_graph <- function(data){
  df=as.data.frame(table(data$age))
  colnames(df)<- c("age","value")
  df=df %>% mutate(percent=apply(df, 1, function(x) round((as.numeric(x[2])/sum(df$value) * 100),1))) %>%  arrange(desc(percent))  %>%  head(10)
  highchart() %>%  hc_add_series(df, hcaes(x = age, y = percent), type = "pie")  %>% hc_add_theme(hc_theme_flatdark()) %>% hc_title(text = "Percent of Age of Customer")
  
}



```



```{r}
#Womens_Clothing_E_Commerce_Reviews <- read_csv("C:/Users/somang.han/Downloads/womens-ecommerce-clothing-reviews/Womens Clothing E-Commerce Reviews.csv")

#colnames(Womens_Clothing_E_Commerce_Reviews) <- c("num_comments","clothing_id","age","title","review_text","rating","recommended_ind","positive_feedback_count","division_name","department_name","class_name")

#head(Womens_Clothing_E_Commerce_Reviews)

# sort(table(Womens_Clothing_E_Commerce_Reviews$clothing_id),decreasing = T)[1:5]

#top_5_review_product_id=names(sort(table(Womens_Clothing_E_Commerce_Reviews$clothing_id),decreasing = T)[1:5])
#top_5_review_product_info=Womens_Clothing_E_Commerce_Reviews[Womens_Clothing_E_Commerce_Reviews$clothing_id  %in%  #top_5_review_product_id,]

```


Clothing ID 862 Product Review Summary  
=======================================================================


Column {data-width=600}
-----------------------------------------------------------------------

### Wordcloud of the Review 

```{r}
word_cloud_fun(review_reader(862))
```



###  Age Distribution of Customers 

```{r}
age_graph(original_review(862))
```





Column {data-height=1000} {.tabset}
-----------------------------------------------------------------------


### Bigram 

```{r fig.height=5}
bi_gram_out(review_reader(862))
```

### Trigram 

```{r fig.height=5}

tri_gram_out(review_reader(862))
```




Clothing ID 1078 Product Review Summary  
=======================================================================


Column {data-width=600}
-----------------------------------------------------------------------

### Wordcloud of the Review 

```{r}
word_cloud_fun(review_reader(1078))
```



###  Age Distribution of Customers 

```{r}
age_graph(original_review(1078))
```





Column {data-height=1000} {.tabset}
-----------------------------------------------------------------------


### Bigram 

```{r fig.height=5}
bi_gram_out(review_reader(1078))
```

### Trigram 

```{r fig.height=5}

tri_gram_out(review_reader(1078))
```


Clothing ID 1094 Product Review Summary  
=======================================================================


Column {data-width=600}
-----------------------------------------------------------------------

### Wordcloud of the Review 

```{r}
word_cloud_fun(review_reader(1094))
```



###  Age Distribution of Customers 

```{r}
age_graph(original_review(1094))
```





Column {data-height=1000} {.tabset}
-----------------------------------------------------------------------


### Bigram 

```{r fig.height=5}
bi_gram_out(review_reader(1094))
```

### Trigram 

```{r fig.height=5}

tri_gram_out(review_reader(1094))
```



Clothing ID 1081 Product Review Summary  
=======================================================================


Column {data-width=600}
-----------------------------------------------------------------------

### Wordcloud of the Review 

```{r}
word_cloud_fun(review_reader(1081))
```



###  Age Distribution of Customers 

```{r}
age_graph(original_review(1081))
```





Column {data-height=1000} {.tabset}
-----------------------------------------------------------------------


### Bigram 

```{r fig.height=5}
bi_gram_out(review_reader(1081))
```

### Trigram 

```{r fig.height=5}

tri_gram_out(review_reader(1081))
```


Clothing ID 872 Product Review Summary  
=======================================================================


Column {data-width=600}
-----------------------------------------------------------------------

### Wordcloud of the Review 

```{r}
word_cloud_fun(review_reader(872))
```



###  Age Distribution of Customers 

```{r}
age_graph(original_review(872))
```





Column {data-height=1000} {.tabset}
-----------------------------------------------------------------------


### Bigram 

```{r fig.height=5}
bi_gram_out(review_reader(872))
```

### Trigram 

```{r fig.height=5}

tri_gram_out(review_reader(872))
```